/////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2009-2014 Alan Wright. All rights reserved.
// Distributable under the terms of either the Apache License (Version 2.0)
// or the GNU Lesser General Public License.
/////////////////////////////////////////////////////////////////////////////

#include "TestInc.h"
#include "BaseTokenStreamFixture.h"
#include "PersianAnalyzer.h"

using namespace Lucene;

typedef BaseTokenStreamFixture PersianAnalyzerTest;

/// These tests show how the combination of tokenization (breaking on zero-width
/// non-joiner), normalization (such as treating arabic YEH and farsi YEH the
/// same), and stopwords creates a light-stemming effect for verbs.
///
/// These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar

/// active present indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs1) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active preterite indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs2) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active imperfective preterite indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs3) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active future indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs4) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88,
                             0xd8, 0xb1, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active present progressive indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs5) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80,
                             0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active preterite progressive indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs6) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80,
                             0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active perfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs7) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7,
                             0xd8, 0xb3, 0xd8, 0xaa
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active imperfective perfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs8) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf,
                             0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active pluperfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs9) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88,
                             0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active imperfective pluperfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs10) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf,
                             0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active preterite subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbs11) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7,
                             0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active imperfective preterite subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbs12) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf,
                             0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active pluperfect subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbs13) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88,
                             0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active imperfective pluperfect subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbs14) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf,
                             0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8,
                             0xa7, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive present indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs15) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c,
                             0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive preterite indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs16) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf};
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive imperfective preterite indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs17) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c,
                             0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive perfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs18) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf,
                             0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive imperfective perfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs19) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c,
                             0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8,
                             0xb3, 0xd8, 0xaa
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive pluperfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs20) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf,
                             0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive imperfective pluperfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs21) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c,
                             0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8,
                             0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive future indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs22) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xae, 0xd9, 0x88,
                             0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive present progressive indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs23) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1,
                             0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd9,
                             0x88, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive preterite progressive indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbs24) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1,
                             0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8,
                             0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive present subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbs25) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd9, 0x88,
                             0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive preterite subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbs26) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf,
                             0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive imperfective preterite subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbs27) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c,
                             0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8,
                             0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive pluperfect subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbs28) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf,
                             0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8,
                             0xa7, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive imperfective pluperfect subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbs29) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c,
                             0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8,
                             0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active present subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbs30) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// This test shows how the combination of tokenization and stopwords creates a
/// light-stemming effect for verbs.
///
/// In this case, these forms are presented with alternative orthography, using
/// arabic yeh and whitespace. This yeh phenomenon is common for legacy text
/// due to some previous bugs in Microsoft Windows.
///
/// These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar

/// active present subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective1) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active preterite indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective2) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active imperfective preterite indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective3) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active future indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective4) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88,
                             0xd8, 0xb1, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active present progressive indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective5) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8,
                             0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active preterite progressive indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective6) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8,
                             0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active perfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective7) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3,
                             0xd8, 0xaa
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active imperfective perfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective8) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87,
                             0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active pluperfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective9) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88,
                             0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active imperfective pluperfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective10) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87,
                             0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active preterite subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective11) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7,
                             0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active imperfective preterite subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective12) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87,
                             0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active pluperfect subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective13) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88,
                             0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active imperfective pluperfect subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective14) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87,
                             0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8,
                             0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive present indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective15) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a,
                             0x20, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive preterite indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective16) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf};
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive imperfective preterite indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective17) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a,
                             0x20, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive perfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective18) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf,
                             0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive imperfective perfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective19) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a,
                             0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive pluperfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective20) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf,
                             0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive imperfective pluperfect indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective21) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a,
                             0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive future indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective22) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xae, 0xd9, 0x88,
                             0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive present progressive indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective23) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1,
                             0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd9, 0x88, 0xd8,
                             0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive preterite progressive indicative
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective24) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1,
                             0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive present subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective25) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd9, 0x88,
                             0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive preterite subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective26) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf,
                             0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive imperfective preterite subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective27) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a,
                             0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8,
                             0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive pluperfect subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective28) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf,
                             0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8,
                             0xa7, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// passive imperfective pluperfect subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective29) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a,
                             0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9,
                             0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// active present subjunctive
TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective30) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// These tests show how the combination of tokenization (breaking on zero-width
/// non-joiner or space) and stopwords creates a light-stemming effect for
/// nouns, removing the plural -ha.

TEST_F(PersianAnalyzerTest, testBehaviorNouns1) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf, 0x20, 0xd9, 0x87, 0xd8, 0xa7};
    const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

TEST_F(PersianAnalyzerTest, testBehaviorNouns2) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf, 0xe2, 0x80, 0x8c, 0xd9, 0x87, 0xd8, 0xa7};
    const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf};
    checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// Test showing that non-Persian text is treated very much like SimpleAnalyzer (lowercased, etc)
TEST_F(PersianAnalyzerTest, testBehaviorNonPersian) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    checkAnalyzesTo(a, L"English test.", newCollection<String>(L"english", L"test"));
}

TEST_F(PersianAnalyzerTest, testReusableTokenStream1) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a,
                             0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9,
                             0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf
                            };
    const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87};
    checkAnalyzesToReuse(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

TEST_F(PersianAnalyzerTest, testReusableTokenStream2) {
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT);
    const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf, 0xe2, 0x80, 0x8c, 0xd9, 0x87, 0xd8, 0xa7};
    const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf};
    checkAnalyzesToReuse(a, UTF8_TO_STRING(first), newCollection<String>(UTF8_TO_STRING(second)));
}

/// Test that custom stopwords work, and are not case-sensitive.
TEST_F(PersianAnalyzerTest, testCustomStopwords) {
    Collection<String> stopWords = newCollection<String>(L"the", L"and", L"a");
    PersianAnalyzerPtr a = newLucene<PersianAnalyzer>(LuceneVersion::LUCENE_CURRENT, HashSet<String>::newInstance(stopWords.begin(), stopWords.end()));
    checkAnalyzesTo(a, L"The quick brown fox.", newCollection<String>(L"quick", L"brown", L"fox"));
}
